Data Loading

# Load master CSV from GitHub raw URL
gps_data <- read_csv("https://raw.githubusercontent.com/tomaszbielNCI/kml_procesor/master/data/output/gps_master.csv")

# Convert dates - handle different time formats
gps_data <- gps_data %>%
  mutate(
    # Handle time from GPX extensions (HH:MM:SS format)
    point_clock_time = ifelse(point_clock != "no_time" & !is.na(point_clock), 
                             paste(route_date, point_clock), NA),
    
    # Convert standard time
    time_standard = ifelse(time != "no_time" & !grepl("^[0-9]{2}:[0-9]{2}:[0-9]{2}$", time), 
                         time, NA),
    
    # Create one time column
    time_clean = coalesce(time_standard, point_clock_time),
    
    # Convert to datetime
    time = ymd_hms(time_clean),
    
    date = date(time),
    hour = hour(time),
    month = month(time, label = TRUE),
    file_type = factor(file_type)
  )

# Basic statistics
cat("Number of GPS points:", nrow(gps_data), "\n")
## Number of GPS points: 74820
cat("Number of unique routes:", n_distinct(gps_data$track_name), "\n")
## Number of unique routes: 35
cat("Date range:", min(gps_data$date, na.rm = TRUE), "-", max(gps_data$date, na.rm = TRUE), "\n")
## Date range: 20211 - 20247
cat("Points with time:", sum(!is.na(gps_data$time)), "\n")
## Points with time: 3492
cat("Points with clock:", sum(!is.na(gps_data$point_clock)), "\n")
## Points with clock: 71328

1. Altitude Histogram

ggplot(gps_data, aes(x = altitude)) +
  geom_histogram(bins = 50, fill = "skyblue", alpha = 0.7, color = "black") +
  labs(title = "Distribution of GPS Point Altitudes",
       x = "Altitude (m above sea level)", y = "Number of Points") +
  geom_vline(aes(xintercept = mean(altitude, na.rm = TRUE)), 
             color = "red", linetype = "dashed", size = 1)


2. Altitude Boxplot by Routes

top_routes <- gps_data %>% 
  count(track_name, sort = TRUE) %>% 
  slice_head(n = 10) %>% 
  pull(track_name)

gps_data %>% 
  filter(track_name %in% top_routes) %>%
  ggplot(aes(x = reorder(track_name, altitude, median), y = altitude)) +
  geom_boxplot(fill = "lightgreen", alpha = 0.7) +
  coord_flip() +
  labs(title = "Altitude Distribution for Top 10 Routes",
       x = "Route", y = "Altitude (m above sea level)")


3. Latitude Distribution

ggplot(gps_data, aes(x = latitude)) +
  geom_density(fill = "orange", alpha = 0.6) +
  labs(title = "Density Distribution of Latitude",
       x = "Latitude", y = "Density")


4. Longitude Distribution

ggplot(gps_data, aes(x = longitude)) +
  geom_density(fill = "purple", alpha = 0.6) +
  labs(title = "Density Distribution of Longitude",
       x = "Longitude", y = "Density")


5. Altitude vs Latitude

ggplot(gps_data, aes(x = latitude, y = altitude)) +
  geom_point(alpha = 0.3, color = "blue") +
  geom_smooth(method = "loess", color = "red", se = TRUE) +
  labs(title = "Altitude vs Latitude Relationship",
       x = "Latitude", y = "Altitude (m above sea level)")


6. Speed Distribution

# Calculate speed from position changes for all data
speed_data <- gps_data %>%
  arrange(track_name, time) %>%
  group_by(track_name) %>%
  mutate(
    lat_diff = latitude - lag(latitude),
    lon_diff = longitude - lag(longitude),
    time_diff = as.numeric(difftime(time, lag(time), units = "secs")),
    distance_km = sqrt(lat_diff^2 + lon_diff^2) * 111,
    speed_kmh = ifelse(time_diff > 0, (distance_km / time_diff) * 3600, NA)
  ) %>%
  filter(!is.na(speed_kmh) & speed_kmh < 150) %>%
  ungroup()

ggplot(speed_data, aes(x = speed_kmh)) +
  geom_histogram(bins = 50, fill = "darkgreen", alpha = 0.7) +
  labs(title = "Speed Distribution Across All Routes",
       x = "Speed (km/h)", y = "Number of Points") +
  geom_vline(aes(xintercept = mean(speed_kmh, na.rm = TRUE)), 
             color = "red", linetype = "dashed", size = 1)


7. Time Distribution of Points (Hours)

gps_data %>%
  filter(!is.na(hour)) %>%
  ggplot(aes(x = hour)) +
  geom_histogram(bins = 24, fill = "gold", alpha = 0.7, color = "black") +
  labs(title = "Distribution of GPS Points by Hour of Day",
       x = "Hour", y = "Number of Points") +
  scale_x_continuous(breaks = seq(0, 24, 2))


8. Points per Route

route_counts <- gps_data %>% 
  count(track_name, sort = TRUE) %>% 
  slice_head(n = 15)

ggplot(route_counts, aes(x = reorder(track_name, n), y = n)) +
  geom_col(fill = "coral", alpha = 0.8) +
  coord_flip() +
  labs(title = "GPS Points for 15 Longest Routes",
       x = "Route", y = "Number of Points")


9. Average Altitude per Route

route_elevation <- gps_data %>% 
  group_by(track_name) %>% 
  summarise(mean_elevation = mean(altitude, na.rm = TRUE),
            .groups = 'drop') %>% 
  arrange(desc(mean_elevation)) %>% 
  slice_head(n = 15)

ggplot(route_elevation, aes(x = reorder(track_name, mean_elevation), y = mean_elevation)) +
  geom_col(fill = "brown", alpha = 0.8) +
  coord_flip() +
  labs(title = "Average Altitude for 15 Highest Routes",
       x = "Route", y = "Average Altitude (m above sea level)")


10. GPS Points Scatter Map

ggplot(gps_data, aes(x = longitude, y = latitude)) +
  geom_point(alpha = 0.4, color = "navy", size = 0.5) +
  labs(title = "GPS Points Scatter Map",
       x = "Longitude", y = "Latitude") +
  theme_minimal()


11. Altitude vs Time (for time-series data)

gps_data %>%
  filter(!is.na(time)) %>%
  slice_sample(n = 5000) %>%  # Sampling for performance
  ggplot(aes(x = time, y = altitude, color = track_name)) +
  geom_line(alpha = 0.7) +
  labs(title = "Altitude Changes Over Time",
       x = "Time", y = "Altitude (m above sea level)") +
  theme(legend.position = "none")


12. Altitude Statistics

# Altitude distribution – enhanced boxplot + violin + jitter 

ggplot(gps_data, aes(x = file_type, y = altitude, fill = file_type)) +
  geom_violin(alpha = 0.5, trim = TRUE, show.legend = FALSE) +
  geom_boxplot(width = 0.2, alpha = 0.8, color = "black", 
               outlier.shape = NA, show.legend = FALSE) +
  geom_jitter(width = 0.1, alpha = 0.2, size = 0.8, color = "gray30", show.legend = FALSE) +
  scale_fill_manual(values = c("gpx" = "#2c7bb6")) + 
  labs(
    title = "Altitude distribution",
    subtitle = "Only GPX files present in the dataset",
    x = NULL,
    y = "Altitude (m above sea level)"
  ) +
  theme_minimal(base_size = 14) +
  theme(
    plot.title = element_text(face = "bold", size = 18, hjust = 0.5),
    plot.subtitle = element_text(size = 12, hjust = 0.5, color = "gray30"),
    axis.title.y = element_text(size = 14),
    axis.text.x = element_text(size = 12, face = "bold"),
    panel.grid.major.x = element_blank(),
    panel.grid.minor = element_blank(),
    panel.grid.major.y = element_line(color = "gray90", linewidth = 0.3)
  )


13. 2D Point Density (Contour)

ggplot(gps_data, aes(x = longitude, y = latitude)) +
  geom_density_2d_filled(alpha = 0.8) +
  labs(title = "GPS Points Density (2D Contour)",
       x = "Longitude", y = "Latitude") +
  theme_minimal()


14. Geographic Range of Each Route

route_bounds <- gps_data %>%
  group_by(track_name) %>%
  summarise(
    lat_range = max(latitude, na.rm = TRUE) - min(latitude, na.rm = TRUE),
    lon_range = max(longitude, na.rm = TRUE) - min(longitude, na.rm = TRUE),
    total_range = lat_range + lon_range,
    .groups = 'drop'
  ) %>%
  arrange(desc(total_range)) %>%
  slice_head(n = 15)

ggplot(route_bounds, aes(x = reorder(track_name, total_range), y = total_range)) +
  geom_col(fill = "darkblue", alpha = 0.8) +
  coord_flip() +
  labs(title = "Geographic Range of 15 Largest Routes",
       x = "Route", y = "Geographic Range (degrees)")


15. Correlation Between Coordinates

gps_data %>%
  sample_n(10000) %>%  # Sampling
  ggplot(aes(x = latitude, y = longitude)) +
  geom_point(alpha = 0.3, color = "red") +
  geom_smooth(method = "lm", color = "blue", se = TRUE) +
  labs(title = "Correlation Between Latitude and Longitude",
       x = "Latitude", y = "Longitude")


16. Altitude Distribution with Quartiles

ggplot(gps_data, aes(x = altitude)) +
  geom_histogram(bins = 50, fill = "lightblue", alpha = 0.7) +
  geom_vline(aes(xintercept = quantile(altitude, 0.25, na.rm = TRUE)), 
             color = "orange", linetype = "dashed", size = 1) +
  geom_vline(aes(xintercept = quantile(altitude, 0.5, na.rm = TRUE)), 
             color = "red", linetype = "dashed", size = 1) +
  geom_vline(aes(xintercept = quantile(altitude, 0.75, na.rm = TRUE)), 
             color = "purple", linetype = "dashed", size = 1) +
  labs(title = "Altitude Distribution with Quartile Markers",
       x = "Altitude (m above sea level)", y = "Number of Points") +
  annotate("text", x = Inf, y = Inf, 
           label = "Q1 | Q2 | Q3", 
           hjust = 1.1, vjust = 1.1,
           color = c("orange", "red", "purple"))


17. Route Statistics Summary Table

route_stats <- gps_data %>%
  group_by(track_name) %>%
  summarise(
    points = n(),
    avg_altitude = round(mean(altitude, na.rm = TRUE), 1),
    min_altitude = round(min(altitude, na.rm = TRUE), 1),
    max_altitude = round(max(altitude, na.rm = TRUE), 1),
    altitude_range = round(max_altitude - min_altitude, 1),
    calories = round(mean(route_total_calories, na.rm = TRUE), 0),
    distance_km = round(mean(route_distance_km, na.rm = TRUE), 2),
    file_type = first(file_type),
    .groups = 'drop'
  ) %>%
  arrange(desc(points)) %>%
  slice_head(n = 20)

DT::datatable(route_stats, 
              options = list(pageLength = 10, scrollX = TRUE),
              caption = "Statistics for 20 Largest Routes")

18. Time Analysis from GPX Extensions

# Data with clock time
time_analysis <- gps_data %>%
  filter(!is.na(point_clock)) %>%
  mutate(
    time_seconds = as.numeric(point_seconds),
    time_minutes = time_seconds / 60
  )

ggplot(time_analysis, aes(x = time_minutes)) +
  geom_histogram(bins = 50, fill = "lightgreen", alpha = 0.7) +
  labs(title = "Distribution of Route Time (minutes from start)",
       x = "Time (minutes)", y = "Number of Points") +
  geom_vline(aes(xintercept = mean(time_minutes, na.rm = TRUE)), 
             color = "red", linetype = "dashed", size = 1)


19. Valleymount to Blessington - Time vs Speed Analysis (Anscombe’s Quartet)

# Filter Valleymount to Blessington route
valleymount_route <- gps_data %>%
  filter(track_name == "valleymount to blessington  12/10/2025 09:22") %>%
  filter(!is.na(point_seconds)) %>%
  mutate(
    time_minutes = as.numeric(point_seconds) / 60,
    # Calculate speed from position changes
    lat_diff = latitude - lag(latitude),
    lon_diff = longitude - lag(longitude),
    time_diff = as.numeric(point_seconds) - lag(as.numeric(point_seconds)),
    distance_km = sqrt(lat_diff^2 + lon_diff^2) * 111,  # Approximate km per degree
    speed_kmh = ifelse(time_diff > 0, (distance_km / time_diff) * 3600, 0),
    speed_kmh = ifelse(speed_kmh > 150, NA, speed_kmh)  # Remove unrealistic speeds
  ) %>%
  filter(!is.na(speed_kmh))

# Time vs Speed scatter plot with regression line
ggplot(valleymount_route, aes(x = time_minutes, y = speed_kmh)) +
  geom_point(alpha = 0.6, color = "orange") +
  geom_smooth(method = "lm", color = "red", se = TRUE, linewidth = 1) +
  labs(title = "Valleymount to Blessington - Time vs Speed Analysis",
       subtitle = "Showing transition from walking to car journey",
       x = "Time (minutes)", y = "Speed (km/h)") +
  theme_minimal() +
  annotate("text", x = Inf, y = Inf, 
           label = paste("R² =", round(summary(lm(speed_kmh ~ time_minutes, data = valleymount_route))$r.squared, 3)),
           hjust = 1.1, vjust = 1.1, color = "red")


20. Valleymount to Blessington - Speed Phases Analysis

# Identify walking vs car phases (75% walking, 25% car)
walking_phase <- valleymount_route %>% head(round(nrow(valleymount_route) * 0.75))
car_phase <- valleymount_route %>% tail(round(nrow(valleymount_route) * 0.25))

# Combine with phase labels
speed_analysis <- bind_rows(
  walking_phase %>% mutate(phase = "Walking"),
  car_phase %>% mutate(phase = "Car")
)

# Box plot comparison
ggplot(speed_analysis, aes(x = phase, y = speed_kmh, fill = phase)) +
  geom_boxplot(alpha = 0.7) +
  geom_jitter(width = 0.2, alpha = 0.3) +
  labs(title = "Speed Comparison: Walking vs Car Phases",
       subtitle = "Valleymount to Blessington Route",
       x = "Journey Phase", y = "Speed (km/h)") +
  scale_fill_manual(values = c("Walking" = "lightblue", "Car" = "lightcoral")) +
  theme_minimal() +
  theme(legend.position = "none")


21. Valleymount to Blessington - Speed Timeline with Transition

# Create speed timeline with transition point
transition_time <- max(walking_phase$time_minutes)

ggplot(valleymount_route, aes(x = time_minutes, y = speed_kmh)) +
  geom_line(alpha = 0.7, color = "steelblue") +
  geom_point(alpha = 0.5, color = "steelblue") +
  geom_vline(xintercept = transition_time, color = "red", linetype = "dashed", linewidth = 1.5) +
  annotate("text", x = transition_time, y = max(valleymount_route$speed_kmh, na.rm = TRUE) * 0.9,
           label = "Transition to Car", angle = 90, vjust = -0.5, color = "red") +
  labs(title = "Speed Timeline - Valleymount to Blessington Route",
       subtitle = "Clear transition point where walking changes to car journey",
       x = "Time (minutes)", y = "Speed (km/h)") +
  theme_minimal()


22. Violin Plot of Speed by Route Type

# Calculate speed for all data first
gps_data_with_speed <- gps_data %>%
  arrange(track_name, time) %>%
  group_by(track_name) %>%
  mutate(
    lat_diff = latitude - lag(latitude),
    lon_diff = longitude - lag(longitude),
    time_diff = as.numeric(difftime(time, lag(time), units = "secs")),
    distance_km = sqrt(lat_diff^2 + lon_diff^2) * 111,
    speed_kmh = ifelse(time_diff > 0, (distance_km / time_diff) * 3600, NA)
  ) %>%
  filter(!is.na(speed_kmh) & speed_kmh < 150) %>%
  ungroup()

# Categorize routes by type (hiking vs transport)
route_categories <- gps_data_with_speed %>%
  mutate(
    route_type = case_when(
      grepl("walk|hiking|mountain", track_name, ignore.case = TRUE) ~ "Hiking",
      grepl("car|drive|blessington", track_name, ignore.case = TRUE) ~ "Transport",
      TRUE ~ "Other"
    )
  )

ggplot(route_categories, aes(x = route_type, y = speed_kmh, fill = route_type)) +
  geom_violin(alpha = 0.7) +
  geom_boxplot(width = 0.1, alpha = 0.8, color = "black") +
  labs(title = "Speed Distribution by Route Type",
       x = "Route Type", y = "Speed (km/h)") +
  scale_fill_manual(values = c("Hiking" = "lightgreen", "Transport" = "lightcoral", "Other" = "lightblue")) +
  theme_minimal()


24. Bubble Plot - Distance vs Points vs Calories

# Create bubble plot data with proper calculations
bubble_data <- gps_data %>%
  group_by(track_name) %>%
  summarise(
    distance = mean(route_distance_km, na.rm = TRUE),
    points = n(),
    calories = mean(route_total_calories, na.rm = TRUE),
    .groups = 'drop'
  ) %>%
  filter(!is.na(distance) & !is.na(calories))

ggplot(bubble_data, aes(x = distance, y = points, size = calories, color = calories)) +
  geom_point(alpha = 0.7) +
  scale_size_continuous(range = c(3, 15), name = "Calories") +
  scale_color_gradient(low = "lightblue", high = "darkred", name = "Calories") +
  labs(title = "Bubble Plot: Distance vs Points vs Calories",
       x = "Distance (km)", y = "Number of Points") +
  theme_minimal()


25. Stacked Bar Chart - File Types by Route

# stacked bar chart – actual point counts by file type (top 10 routes per file type)
stacked_data <- gps_data %>%
  count(track_name, file_type) %>%
  group_by(file_type) %>%
  arrange(desc(n)) %>%
  slice_head(n = 10) %>%
  ungroup() %>%
  mutate(track_name = fct_reorder(track_name, n, sum, .desc = FALSE))

ggplot(stacked_data, aes(x = track_name, y = n, fill = file_type)) +
  geom_col(position = "stack", width = 0.7) +
  coord_flip() +
  scale_fill_viridis_d(option = "plasma", begin = 0.2, end = 0.8) +
  labs(
    title = "GPS Points per Route by File Type",
    subtitle = "Top 10 routes for each file type",
    x = "", y = "Number of Points", fill = "File Type"
  ) +
  theme_minimal(base_size = 14) +
  theme(
    plot.title = element_text(face = "bold", size = 18, hjust = 0.5),
    plot.subtitle = element_text(size = 12, hjust = 0.5, color = "gray30"),
    axis.title.x = element_text(size = 14),
    axis.text.y = element_text(size = 12),
    legend.position = "bottom",
    legend.title = element_text(size = 12),
    legend.text = element_text(size = 10),
    panel.grid.major.y = element_blank(),
    panel.grid.minor = element_blank()
  )


26. Heat Map - Activity by Hour and Day

# heat map – activity intensity by hour and day of week
heat_data <- gps_data %>%
  filter(!is.na(hour) & !is.na(date)) %>%
  mutate(day = wday(date, label = TRUE, week_start = 1)) %>%  # Monday first
  count(day, hour, .drop = FALSE) %>%
  complete(day, hour, fill = list(n = 0))

ggplot(heat_data, aes(x = hour, y = day, fill = n)) +
  geom_tile(color = "white", linewidth = 0.2) +
  scale_fill_viridis_c(
    option = "plasma",
    direction = -1,
    name = "Number of\npoints",
    breaks = scales::pretty_breaks(n = 5)
  ) +
  scale_x_continuous(
    breaks = seq(0, 23, 2),
    expand = c(0, 0)
  ) +
  scale_y_discrete(expand = c(0, 0)) +
  labs(
    title = "GPS recording activity",
    subtitle = "Number of GPS points recorded per hour and day of week",
    x = "Hour of day", y = NULL
  ) +
  theme_minimal(base_size = 14) +
  theme(
    plot.title = element_text(face = "bold", size = 18, hjust = 0.5),
    plot.subtitle = element_text(size = 12, hjust = 0.5, color = "gray30"),
    axis.title.x = element_text(size = 14),
    axis.text = element_text(size = 12),
    axis.text.y = element_text(size = 12),
    panel.grid = element_blank(),
    legend.position = "bottom",
    legend.key.width = unit(2, "cm"),
    legend.key.height = unit(0.4, "cm"),
    legend.title = element_text(size = 11, vjust = 0.8),
    legend.text = element_text(size = 10),
    plot.margin = margin(10, 10, 10, 10)
  )


27. Trellis Plot - Altitude by Route

# Trellis (facet) plot – altitude profiles for top routes

# Define top 10 routes by number of points 
if (!exists("top_routes") || length(top_routes) == 0) {
  top_routes <- gps_data %>%
    count(track_name, sort = TRUE) %>%
    slice_head(n = 10) %>%
    pull(track_name)
}

# Prepare data: use absolute time if available, otherwise relative time (point_seconds),
# or simply point index as a last resort.
trellis_data <- gps_data %>%
  filter(track_name %in% top_routes, !is.na(altitude)) %>%
  arrange(track_name, time) %>%
  group_by(track_name) %>%
  mutate(
    x_axis = case_when(
      !is.na(time) ~ as.numeric(difftime(time, min(time, na.rm = TRUE), units = "mins")),
      !is.na(point_seconds) ~ point_seconds / 60,   # point_seconds = seconds from start
      TRUE ~ row_number()
    ),
    x_source = case_when(
      !is.na(time) ~ "absolute time",
      !is.na(point_seconds) ~ "relative time",
      TRUE ~ "point index"
    )
  ) %>%
  slice_sample(n = 200) %>%   # at most 200 points per route (all if fewer)
  ungroup()

if (nrow(trellis_data) > 0) {
  
  # Determine a descriptive subtitle based on the actual time source
  source_note <- if (all(trellis_data$x_source == "absolute time")) {
    "based on absolute time"
  } else if (any(trellis_data$x_source == "relative time")) {
    "based on relative time (point_seconds)"
  } else {
    "based on point index (no time data available)"
  }
  
  ggplot(trellis_data, aes(x = x_axis, y = altitude)) +
    geom_line(alpha = 0.6, color = "#2c3e50", linewidth = 0.5) +
    facet_wrap(~track_name, scales = "free_y", ncol = 2) +
    labs(
      title = "Altitude profiles by route",
      subtitle = paste("Each panel shows a different route (top 10 by number of points) –", source_note),
      x = if (grepl("absolute", source_note)) "Time from start (minutes)" else
           if (grepl("relative", source_note)) "Relative time (minutes)" else "Point index",
      y = "Altitude (m)"
    ) +
    theme_minimal(base_size = 14) +
    theme(
      plot.title = element_text(face = "bold", size = 18, hjust = 0.5),
      plot.subtitle = element_text(size = 12, hjust = 0.5, color = "gray30"),
      axis.title = element_text(size = 14),
      axis.text = element_text(size = 10),
      strip.text = element_text(size = 12, face = "bold"),
      panel.grid.minor = element_blank(),
      panel.spacing = unit(1, "lines"),
      plot.margin = margin(10, 10, 10, 10)
    )
  
} else {
  message("No data available for trellis plot – check if top routes contain valid altitude or time information.")
}


28. Strip Chart with Means

# Strip chart with means
strip_data <- gps_data %>%
  filter(!is.na(route_total_calories)) %>%
  group_by(track_name) %>%
  summarise(
    calories = mean(route_total_calories, na.rm = TRUE),
    .groups = 'drop'
  ) %>%
  arrange(desc(calories)) %>%
  head(15)

ggplot(strip_data, aes(x = track_name, y = calories)) +
  geom_jitter(width = 0.2, alpha = 0.6, color = "darkblue") +
  stat_summary(fun = mean, geom = "point", shape = 23, size = 4, color = "red") +
  coord_flip() +
  labs(title = "Strip Chart with Means: Average Calories by Route",
       x = "Route", y = "Average Calories") +
  theme_minimal()


29. 3D Visualization of Selected Route (Cedric Scherer Style)

# Load required libraries for 3D visualization
library(plotly)
library(viridis)

# Select specific route: 'from Enniskerry to Sandyford'
selected_route <- "GPX Download: Scarr Mountain – Great Lake Views loop from Laragh"

# Prepare data for this route (sorted by time)
df_3d <- gps_data %>%
  filter(track_name == selected_route, !is.na(time), !is.na(altitude)) %>%
  arrange(time) %>%
  mutate(
    time_min = as.numeric(difftime(time, min(time), units = "mins")),
    # Calculate speed if not available
    lat_diff = latitude - lag(latitude),
    lon_diff = longitude - lag(longitude),
    time_diff = as.numeric(difftime(time, lag(time), units = "secs")),
    dist_km = sqrt(lat_diff^2 + lon_diff^2) * 111,  # approximation
    speed_kmh = ifelse(time_diff > 0, (dist_km / time_diff) * 3600, NA)
  ) %>%
  filter(!is.na(speed_kmh), speed_kmh < 150)  # filter out erroneous values

# Choose variable for coloring (speed or altitude)
color_var <- df_3d$speed_kmh
color_title <- "Speed (km/h)"

# Create 3D plot (line + points)
plot_ly(df_3d,
        x = ~longitude,
        y = ~latitude,
        z = ~altitude,
        type = 'scatter3d',
        mode = 'lines+markers',
        line = list(
          width = 6,
          color = ~color_var,
          colorscale = 'Viridis',
          showscale = TRUE,
          reversescale = FALSE
        ),
        marker = list(
          size = 2,
          color = ~color_var,
          colorscale = 'Viridis',
          showscale = FALSE
        ),
        text = ~paste(
          "Time:", round(time_min, 1), "min<br>",
          "Speed:", round(speed_kmh, 1), "km/h<br>",
          "Altitude:", round(altitude, 0), "m"
        ),
        hoverinfo = 'text'
        ) %>%
  layout(
    title = list(
      text = paste("3D Route Profile:<br>", selected_route),
      font = list(size = 16, family = "Arial, sans-serif")
    ),
    scene = list(
      xaxis = list(
        title = "Longitude",
        backgroundcolor = "white",
        gridcolor = "lightgray",
        showbackground = TRUE
      ),
      yaxis = list(
        title = "Latitude",
        backgroundcolor = "white",
        gridcolor = "lightgray",
        showbackground = TRUE
      ),
      zaxis = list(
        title = "Altitude (m)",
        backgroundcolor = "white",
        gridcolor = "lightgray",
        showbackground = TRUE
      ),
      camera = list(
        eye = list(x = 1.8, y = 1.8, z = 1.2)  # adjust perspective
      )
    ),
    margin = list(l = 0, r = 0, b = 0, t = 60),
    coloraxis = list(colorbar = list(title = color_title))
  ) %>%
  colorbar(title = color_title, len = 0.8)

Summary

cat("=== GPS DATA SUMMARY ===\n\n")
## === GPS DATA SUMMARY ===
cat("Total number of points:", nrow(gps_data), "\n")
## Total number of points: 74820
cat("Number of unique routes:", n_distinct(gps_data$track_name), "\n")
## Number of unique routes: 35
cat("Number of GPX files:", sum(gps_data$file_type == "gpx"), "\n")
## Number of GPX files: 74820
cat("Number of KML files:", sum(gps_data$file_type == "kml"), "\n")
## Number of KML files: 0
cat("Points with time:", sum(!is.na(gps_data$time)), "\n")
## Points with time: 3492
cat("Points with clock:", sum(!is.na(gps_data$point_clock)), "\n")
## Points with clock: 71328
cat("Altitude range:", round(min(gps_data$altitude, na.rm = TRUE), 1), "-", 
    round(max(gps_data$altitude, na.rm = TRUE), 1), "m above sea level\n")
## Altitude range: -6.1 - 924 m above sea level
cat("Average altitude:", round(mean(gps_data$altitude, na.rm = TRUE), 1), "m above sea level\n")
## Average altitude: 374.8 m above sea level
cat("Latitude range:", round(min(gps_data$latitude, na.rm = TRUE), 4), "-", 
    round(max(gps_data$latitude, na.rm = TRUE), 4), "\n")
## Latitude range: 52.9671 - 54.6502
cat("Longitude range:", round(min(gps_data$longitude, na.rm = TRUE), 4), "-", 
    round(max(gps_data$longitude, na.rm = TRUE), 4), "\n")
## Longitude range: -8.697 - -6.0045
if(sum(!is.na(gps_data$route_total_calories)) > 0) {
  cat("Calories range:", round(min(gps_data$route_total_calories, na.rm = TRUE)), "-", 
      round(max(gps_data$route_total_calories, na.rm = TRUE)), "kcal\n")
  cat("Average calories:", round(mean(gps_data$route_total_calories, na.rm = TRUE)), "kcal\n")
}
## Calories range: 713 - 4898 kcal
## Average calories: 2048 kcal
if(sum(!is.na(gps_data$route_distance_km)) > 0) {
  cat("Distance range:", round(min(gps_data$route_distance_km, na.rm = TRUE), 2), "-", 
      round(max(gps_data$route_distance_km, na.rm = TRUE), 2), "km\n")
  cat("Average distance:", round(mean(gps_data$route_distance_km, na.rm = TRUE), 2), "km\n")
}
## Distance range: 7.5 - 44.31 km
## Average distance: 20.49 km

Generated on 2026-02-14 using R Markdown and ggplot2